#!/usr/bin/python

import code,pickle,sys,os,re
import matplotlib
if "DISPLAY" not in os.environ: matplotlib.use("AGG")
else: matplotlib.use("GTK")
from ocrolib import dbtables
from pylab import *
from optparse import OptionParser
import traceback
import sqlite3

parser = OptionParser("""
usage: %prog [options] database.db ...

Show information about the given databases.
""")

parser.add_option("-f","--full",help="full class counts",action="store_true")
(options,args) = parser.parse_args()

if len(args)<1:
    parser.print_help()
    sys.exit(0)

fw = int(amax([len(s) for s in args]))

for dbfile in args:
    db = sqlite3.connect(dbfile)
    cur = db.cursor()
    names = cur.execute("select name from sqlite_master where type='table'")
    tables = [row[0] for row in names]

    for table in tables:
        counts = cur.execute("select cls,count(*) from %s group by cls order by cls"%(table))
        if options.full:
            print
            print "===",dbfile,":",table,"==="
            print
            sql = cur.execute("select sql from sqlite_master where type='table' and name='%s'"%(table))
            print list(sql)[0][0]
            print
            for count in counts:
                print "%6s\t%6d"%("[%s]"%count[0],count[1])
            print "empty",empty
            print "space",space
            print "control",control
            print "reject",reject
            print "unlabeled",unlabeled
            print "upper",upper
            print "lower",lower
            print "digit",digit
            print "nonascii",nonascii
            print "multi",multi
        else:
            empty = 0
            space = 0
            control = 0
            reject = 0
            unlabeled = 0
            upper = 0
            lower = 0
            digit = 0
            nonascii = 0
            multi = 0
            for count in counts:
                cls,n = count
                if cls is None or len(cls)<1: empty += n
                elif len(cls)>1: multi += n
                elif cls==" ": space += n
                elif cls=="_": unlabeled += n
                elif cls=="~": reject += n
                elif ord(cls)<32: control += n
                elif re.match('[a-z]',cls): lower += n
                elif re.match('[A-Z]',cls): upper += n
                elif re.match('[0-9]',cls): digit += n
                elif ord(cls[0])>=128: nonascii += n
            print "%*s: "%(fw,dbfile),
            sys.stdout.flush()
            print "rej %7d uc %7d lc %7d dig %7d na %7d _ %7d lig %7d"%(reject,upper,lower,digit,nonascii,unlabeled,multi)
